# Replication package for 
# "Civil War Mediation in the Shadow of IGOs: 
# The Path to Comprehensive Peace Agreements"
# ohannes Karreth, Jaroslav Tir, JJason Quinn, and Madhav Joshi
# Forthcoming at the Journal of Peace Research
# jkarreth@ursinus.edu

# Please read "0_readme.html" before using this code.

# This file is `analyze_mediation.R`
# It generates the main results in the article and in the supporting information.

# The full replication package (including source data) is posted on 
# Dataverse: https://dataverse.harvard.edu/dataverse/jkarreth

## Unit of obs.: (1) one conflict-year per UCDP/PRIO
##               starting in 1989, since PAM data start then
##               (2) one conflict

####################################
### Set up data and packages
####################################

rm(list = ls())

setwd(dirname(rstudioapi::getActiveDocumentContext()$path))

library("texreg")
library("modelsummary")
library("marginaleffects")
library("xtable")
library("viridis")
library("ggridges")
library("GJRM")
library("estimatr")
library("MAMI")
library("Zelig") # install from source, not on CRAN anymore
library("tidyverse")

theme_jk <- function(base_size = 11)
{
  theme_bw(base_size = base_size) %+replace%
    theme(
      strip.background = element_rect(fill = NA, color = NA, linewidth = 0),
      strip.text.x = element_text(size = rel(1.25), margin = margin(t = base_size/2 * 0.6, b = base_size/2 * 0.6), face = "bold"),
      panel.border = element_rect(fill = NA, color = "black", linewidth = 0.75),
      plot.title = element_text(size = rel(1.2), margin = margin(b = base_size/2 * 1.2), face = "bold", hjust = 0.5),
      plot.subtitle = element_text(size = rel(1.2), margin = margin(b = base_size/2 * 1.2), hjust = 0.5),
      plot.title.position = "plot",
      panel.grid.major = element_line(colour = "grey90", linewidth = 0.4, linetype = 3), 
      panel.grid.minor = element_line(colour = "grey85", linewidth = 0.2, linetype = 3),
      axis.text = element_text(color = "black")
    )
}

meanstd <- function(x){
  (x - mean(x, na.rm = TRUE)) / (1 * sd(x, na.rm = TRUE))
}

#################################
### Tests at the conflict-year level
#################################

d <- rio::import("Data/HSIGO-CPA_cy.csv")
d$hsigo_other_lag <- d$hsigo_ipol_lag - d$hligo_ipol_lag
d$region23 <- countrycode::countrycode(sourcevar = d$cowcode, origin = "cown", destination = "region23")

# add time counter
d_h1_m1_cy <- d[d$cwm_sample == 1 | d$mic_sample == 1, ]
d_h1_m1_cy_withspell <- pltesim::btscs(df = d_h1_m1_cy, event = "mediation", t_var = "Year", cs_unit = "ID_ucdp")
d_h1_m1_cy_withspell$spell_time2 <- d_h1_m1_cy_withspell$spell_time^2
d_h1_m1_cy_withspell$spell_time3 <- d_h1_m1_cy_withspell$spell_time^3

d_h1_m1_cy <- d_h1_m1_cy_withspell

#################################
### H1a/b: Domestic armed conflicts in countries that are more involved in 
### IGOs with high economic leverage receive more/less attention from mediators.
#################################

# Verify the right conflicts are associated with the right CPAs
View(arrange(d[d$cwm_sample == 1 | d$mic_sample == 1, c("cowcode", "Year", "ID_ucdp_year", "mediation_cumsum", "pam_agneg2", "pam_caseid", "ucdp_IntensityLevel")], ID_ucdp_year))

# Unit of analysis: conflict-year
# h1_m1_med: y = mediation (either CWM or MIC mediation occurrence in given year)

summary(h1_m1_med_1_lev <- glm(mediation ~ hligo_ipol_lag + hsigo_other_lag + 
                                 bdbest_lag_log + mediation_cumsum_log_lag + conflictyears_log + territorial_conflict + gems_drugs_oil +
                                 population_log_lag + vdem_polyarchy_ctr_lag + gdppc2005_log_lag +  ai_df_std + UNPKO_yes_lag + pivv_us_std + 
                                 spell_time + spell_time2 + spell_time3, 
                               data = d_h1_m1_cy, family = binomial(link = "probit")))

coefmap <- c("hligo_ipol_lag" = "IGOs with high economic leverage",
             "hsigo_other_lag" = "All other HSIGOs",
             "bdbest_lag_log" = "Battle deaths in prior year (logged)",
             "mediation_cumsum_log_lag" = "Prior years with mediation (logged)",
             "conflictyears_log" = "Conflict duration (years logged)",
             "territorial_conflict" = "Territorial conflict",
             "gems_drugs_oil" = "Natural resources",
             "population_log_lag" = "Population (logged)",
             "vdem_polyarchy_ctr_lag" = "Democracy",
             "gdppc2005_log_lag" = "GDP per capita (logged)",
             "ai_df_std" = "Trade globalization",
             "UNPKO_yes_lag" = "UNPKO",
             "pivv_us_std" = "UNGA proximity to US",
             "spell_time" = "Time to event",
             "spell_time2" = "Time to event (squared)",
             "spell_time3" = "Time to event (cubed)",
             "(Intercept)" = "Intercept")

modelsummary(h1_m1_med_1_lev, 
             output = "Output/Tables/h1_m1_med_1_lev.tex",
             estimate = "estimate",
             statistic = "std.error",
             vcov = ~ region23, 
             stars = c("*" = .05),
             coef_map = coefmap,
             gof_omit = "AIC|BIC|RMSE",
             # align = "c",
             notes = "Standard errors, clustered by region, in parentheses.",
             title = "Evidence on H1: Probit estimates of mediation incidents, 1989--2011. Unit of analysis: conflict-year."
)

# predictions for IC comparison

effects::Effect("hligo_ipol_lag",
       mod = h1_m1_med_1_lev, 
       xlevels = list(hligo_ipol_lag = c(1:10)))

# with standardized coefficients instead - for plotting later

h1_m1_med_1_lev_stdcoef <- parameters::standardize_parameters(h1_m1_med_1_lev)

# Imputation
# using Zelig version 5.1.7 installed from local source

set.seed(123)
h1_m1_med_1_imp <- Amelia::amelia(x = data.frame(dplyr::select(d_h1_m1_cy, ID_ucdp_year, cowcode, Year, mediation, 
                                                               hligo_ipol_lag, hsigo_other_lag, 
                                                               bdbest_lag_log, mediation_cumsum_log_lag, conflictyears_log, territorial_conflict, 
                                                               gems_drugs_oil, exclpop_std, population_log_lag, democracy6_lag, chga_demo_lag, vdem_polyarchy_ctr_lag, 
                                                               gdppc2005_log_lag, tradegdp01_lag, fdi_stock_log_lag, ai_df_std, aii_df_std, aid_commitment_lag_mill_std, 
                                                               ext_int_mapo, UNPKO_yes_lag, pivv_us_std,
                                                               spell_time, spell_time2, spell_time3)), 
                                  cs = "cowcode", ts = "Year", idvars = c("ID_ucdp_year"),
                                  m = 5)

h1_m1_med_lev_1_zelig <- Zelig::zelig(mediation ~ hligo_ipol_lag + hsigo_other_lag +
                                        bdbest_lag_log + mediation_cumsum_log_lag + conflictyears_log + territorial_conflict + gems_drugs_oil +
                                        population_log_lag + vdem_polyarchy_ctr_lag + gdppc2005_log_lag +  ai_df_std + 
                                        UNPKO_yes_lag + pivv_us_std + 
                                        spell_time + spell_time2 + spell_time3, 
                                      model = "probit", data = h1_m1_med_1_imp)

texreg(list(Zelig::from_zelig_model(h1_m1_med_lev_1_zelig)[[1]]),
       file = "Output/Tables/h1_m1_med_1_lev_imp.tex",
       single.row = FALSE,
       stars = c(0.05),
       custom.coef.names = c("Intercept", "IGOs with high economic leverage", "Other HSIGOs", "Battle deaths in prior year (logged)", "Prior years with mediation (logged)", 
                             "Conflict duration (years logged)", "Territorial conflict", "Natural resources", 
                             "Population (logged)", "Democracy", "GDP per capita (logged)", 
                             "Trade globalization", "UNPKO", "UNGA proximity to US",
                             "Time to event", "Time to event squared", "Time to event cubed"),
       override.coef = Zelig::combine_coef_se(h1_m1_med_lev_1_zelig)[, 1], 
       override.se = Zelig::combine_coef_se(h1_m1_med_lev_1_zelig)[, 2], 
       override.pvalues = Zelig::combine_coef_se(h1_m1_med_lev_1_zelig)[, 4],
       reorder.coef = c(2:length(h1_m1_med_lev_1_zelig$zelig.out$z.out[[1]]$coefficients), 1),
       center = TRUE,
       caption = "Evidence on H1, using IGOs with high economic leverage: Probit estimates of mediation incidents, 1989--2011. Unit of analysis: conflict-year. Estimates after multiple imputation.",
       caption.above = TRUE,
       custom.note = "\\$p < 0.05\\$. Standard errors in parentheses.", # https://stackoverflow.com/questions/30902906/wrapping-custom-notes-in-texreg-output
       label = "tab:h1_m1_med_lev_1_imp",
       booktabs = TRUE,
       dcolumn = TRUE,
       use.packages = FALSE,
       fontsize = "scriptsize")

# BMA

# with HLIGOs and HSIGOs

bma_dat <- data.frame(dplyr::select(d_h1_m1_cy, cowcode, Year, mediation, hligo_ipol_lag, hsigo_other_lag,
                                    bdbest_lag_log, mediation_cumsum_log_lag, conflictyears_log, territorial_conflict, gems_drugs_oil, exclpop_std, 
                                    population_log_lag, vdem_polyarchy_ctr_lag, gdppc2005_log_lag, fdi_stock_log_lag,
                                    ai_df_std, aid_commitment_lag_mill_std, ext_int_mapo, UNPKO_yes_lag, pivv_us_std))

colnames(bma_dat)[4:20] <- c("Memberships in IGOs with\nhigh economic leverage", "Other HSIGOs", "Battle deaths in prior year\n(logged)", "Prior years with mediation\n(logged)", 
                             "Conflict duration\n(years logged)", "Territorial conflict", "Natural resources", 
                             "Share of\nexcluded population", "Population (logged)", "Democracy", "GDP per capita\n(logged)", 
                             "FDI stock", "Trade globalization", "Aid commitments", "External intervention\nby powerful state", "UNPKO", "UNGA proximity to US")

med_bma <- BMA::bic.glm(y = bma_dat[, 3], x = bma_dat[, c(4:20)], 
                        glm.family = binomial(link = "probit"))

pdf("Output/Figures/h1_bma_hligos.pdf", width = 8, height = 9)
plot(med_bma, mfrow = c(5, 4))
dev.off()

# LASSO using MAMI instead

med_lasso <- MAMI::mami(X = bma_dat,
                        missing.data = "CC",
                        model = "binomial",
                        outcome = "mediation",
                        method = "LASSO",
                        inference = "+boot",
                        var.remove = c("cowcode", "Year"))

summary(med_lasso)

# Remove intercept for plotting
med_lasso$coefficients.s <- med_lasso$coefficients.s[-1, ]
med_lasso$coefficients.boot.s <- med_lasso$coefficients.boot.s[-1, ]
med_lasso$boot.results[[1]] <- med_lasso$boot.results[[1]][, -1]

rownames(med_lasso$coefficients.s) <- c("Memberships in IGOs with\nhigh economic leverage", "Other HSIGOs", "Battle deaths in prior year (logged)", "Prior years with mediation (logged)", 
                                        "Conflict duration (years logged)", "Territorial conflict", "Natural resources", 
                                        "Share of excluded population", "Population (logged)", "Democracy", "GDP per capita (logged)", 
                                        "FDI stock", "Trade globalization", "Aid commitments", "External intervention by powerful state", "UNPKO", "UNGA proximity to US")
rownames(med_lasso$coefficients.boot.s) <- c("Memberships in IGOs with\nhigh economic leverage", "HSIGO memberships", "Battle deaths in prior year (logged)", "Prior years with mediation (logged)", 
                                             "Conflict duration (years logged)", "Territorial conflict", "Natural resources", 
                                             "Share of excluded population", "Population (logged)", "Democracy", "GDP per capita (logged)", 
                                             "FDI stock", "Trade globalization", "Aid commitments", "External intervention by powerful state", "UNPKO", "UNGA proximity to US")
colnames(med_lasso$boot.results[[1]]) <- c("Memberships in IGOs with\nhigh economic leverage", "Other HSIGOs", "Battle deaths in prior year (logged)", "Prior years with mediation (logged)", 
                                           "Conflict duration (years logged)", "Territorial conflict", "Natural resources", 
                                           "Share of excluded population", "Population (logged)", "Democracy", "GDP per capita (logged)", 
                                           "FDI stock", "Trade globalization", "Aid commitments", "External intervention by powerful state", "UNPKO", "UNGA proximity to US")

pdf("Output/Figures/h1_lasso.pdf", width = 12, height = 12)
plot(med_lasso, plots.p.page = "9", ask = FALSE, color = c("yellow", "purple4"))
dev.off()

# Coverage:

h1_coverage <- model.frame(mediation ~ hligo_ipol_lag + cowcode + Year + ID_ucdp_year, data = d[d$cwm_sample == 1 | d$mic_sample == 1, ])
summary(h1_coverage$Year)
length(unique(h1_coverage$ID_ucdp_year))

# did all CPAs indeed get mediation at some point?
h1_data <- d[d$cwm_sample == 1 | d$mic_sample == 1, c("mediation", "pam_agneg2", "pam_caseid")]
table(h1_data$mediation, h1_data$pam_agneg2)

h1_m1_med_0.data <- model.frame(mediation ~ hligo_ipol_lag + hsigo_other_lag + cowcode + Year + ID_ucdp_year, data = d[d$cwm_sample == 1 | d$mic_sample == 1, ])
summary(h1_m1_med_0.data$Year)

h1_m1_med_1.data <- model.frame(mediation ~ hligo_ipol_lag + hsigo_other_lag +
                                  + bdbest_lag_log + mediation_cumsum_log_lag + conflictyears_log + territorial_conflict + gems_drugs_oil +
                                  + population_log_lag + vdem_polyarchy_ctr_lag + gdppc2005_log_lag +  ai_df_std + UNPKO_yes_lag + pivv_us_std + cowcode + Year + ID_ucdp_year, data = d[d$cwm_sample == 1 | d$mic_sample == 1, ])

summary(h1_m1_med_1.data$Year)
table(data.frame(table(h1_m1_med_1.data$ID_ucdp_year))$Freq)
# Only one case per conflict-year - OK

# Summary statistics

h1_cy_sumstats <- dplyr::select(dplyr::filter(d_h1_m1_cy, (cwm_sample == 1 | mic_sample == 1) & is.na(mediation) == FALSE & is.na(hligo_ipol_lag) == FALSE),
                                `Mediation in given year` = mediation,
                                `Memberships in IGOs with high economic leverage` = hligo_ipol_lag,
                                `Memberships in other HSIGOs` = hsigo_other_lag,
                                `Battle deaths in prior year (logged)` = bdbest_lag_log,
                                `Prior years with mediation (logged)` = mediation_cumsum_log_lag,
                                `Conflict duration (years logged)` = conflictyears_log,
                                `Territorial conflict` = territorial_conflict,
                                `Natural resources` = gems_drugs_oil,
                                `Population in prior year (logged)` = population_log_lag,
                                `Democracy in prior year` = vdem_polyarchy_ctr_lag,
                                `GDP per capita in prior year (logged)` = gdppc2005_log_lag,
                                `Trade globalization` = ai_df_std,
                                `UNPKO` = UNPKO_yes_lag,
                                `UNGA proximity to US` = pivv_us_std,
                                `Time to mediation` = spell_time,
                                `COW code` = cowcode,
                                `Year` = Year)

h1_cy_sumstats <- as.data.frame(h1_cy_sumstats)
h1_cy_sumstats <- dplyr::select(h1_cy_sumstats, -`COW code`, -`Year`)

datasummary(formula = All(h1_cy_sumstats) ~ Mean + SD + Min + Median + Max + N,
            data = h1_cy_sumstats,
            output = "Output/Tables/h1_m1_med_1_sumstats_all.tex",
            title = "Summary statistics for analyses of mediation at the conflict-year; all observations.",
            notes = "")

h1_cy_m1_sumstats <- dplyr::select(dplyr::filter_at(d_h1_m1_cy, vars((colnames(model.frame(h1_m1_med_1_lev)))), all_vars(!is.na(.) & (cwm_sample == 1 | mic_sample == 1))),
                                   `Mediation in given year` = mediation,
                                   `Memberships in IGOs with high economic leverage` = hligo_ipol_lag,
                                   `Memberships in other HSIGOs` = hsigo_other_lag,
                                   `Battle deaths in prior year (logged)` = bdbest_lag_log,
                                   `Prior years with mediation (logged)` = mediation_cumsum_log_lag,
                                   `Conflict duration (years logged)` = conflictyears_log,
                                   `Territorial conflict` = territorial_conflict,
                                   `Natural resources` = gems_drugs_oil,
                                   `Population in prior year (logged)` = population_log_lag,
                                   `Democracy in prior year` = vdem_polyarchy_ctr_lag,
                                   `GDP per capita in prior year (logged)` = gdppc2005_log_lag,
                                   `Trade globalization` = ai_df_std,
                                   `UNPKO` = UNPKO_yes_lag,
                                   `UNGA proximity to US` = pivv_us_std,
                                   `Time to mediation` = spell_time,
                                   `COW code` = cowcode,
                                   `Year` = Year)

h1_cy_m1_sumstats <- as.data.frame(h1_cy_m1_sumstats)
h1_cy_m1_sumstats <- dplyr::select(h1_cy_m1_sumstats, -`COW code`, -`Year`)

datasummary(formula = All(h1_cy_m1_sumstats) ~ Mean + SD + Min + Median + Max + N,
            data = h1_cy_m1_sumstats,
            output = "Output/Tables/h1_m1_med_1_sumstats_sample.tex",
            title = "Summary statistics for analyses of mediation at the conflict-year; estimation sample only.",
            notes = "")

# Effect plots

p_h1_m1_med_1_lev <- predictions(
  mod = h1_m1_med_1_lev,
  type = "response",
  vcov = ~ region23,
  conf_level = 0.834,
  newdata = datagrid(hligo_ipol_lag = 1:10, grid_type = "counterfactual"))
p_h1_m1_med_1_lev_df <- tidy(p_h1_m1_med_1_lev, by = "hligo_ipol_lag", conf_level = 0.834)

h1_m1_med_1_lev_rug <- data.frame(hligo_ipol_lag = model.frame(h1_m1_med_1_lev)[, 2])
h1_m1_med_1_lev_rug_long <- pivot_longer(data = h1_m1_med_1_lev_rug, cols = everything(), 
                                         names_to = "variable", 
                                         values_to = "value")
h1_m1_med_1_lev_rug_long <- dplyr::summarize(group_by(h1_m1_med_1_lev_rug_long, value), 
                                             count = n())

h1_m1_med_1_lev.eff.p <- ggplot(data = p_h1_m1_med_1_lev_df, 
                               aes(x = hligo_ipol_lag, y = estimate)) +
  geom_segment(aes(y = conf.low, yend = conf.high, xend = hligo_ipol_lag), alpha = 1) + 
  geom_line(aes(group = 1), linewidth = 0.5, lineend = "round") + 
  geom_point(color = "black") +
  xlab("Memberships in IGOs with high economic leverage") + 
  ylab("Probability of mediation in a given year") +
  ylim(0, 1) + 
  scale_x_continuous(breaks = c(0, 5, 10), limits = c(0, 10)) +
  theme_jk()

h1_m1_med_1_lev.hist.p <- ggplot(h1_m1_med_1_lev_rug_long, aes(x = value, y = count)) +   
  geom_bar(position = "dodge", stat = "identity", fill = "black") + 
  scale_color_manual(values = c(rep("white", 21))) + 
  guides(color = "none") + 
  xlab("Memberships in IGOs with high economic leverage") + 
  ylab("Count") + 
  scale_x_continuous(breaks = c(0, 5, 10), limits = c(0, 10)) +
  theme_jk()

### Collapse to conflicts as level of analysis

d_c <- rio::import("Data/HSIGO-CPA_c.csv")
d_c$hsigo_other_y0 <- d_c$hsigo_ipol_y0 - d_c$hligo_ipol_y0
d_c$region23 <- countrycode::countrycode(sourcevar = d_c$cowcode, origin = "cown", destination = "region23")

# These samples contain all conflicts where:
#   
# - a PA was negotiated, or
# - a conflict episode ended per UCDP/PRIO, or
# - a conflict exits UCDP/PRIO without being coded as having ended.
# 
# While the UCDP/PRIO data in our period of interest (1989-2014) only lists 139 separate conflicts, some conflicts are split into different episodes. I treat each episode as "eligible" for PA negotiation. Hence the sample here is up to 225 observations rather than 139.

d_h1_c <- d_c[d_c$cwm_sample == 1 | d_c$mic_sample == 1, ]

summary(h1_c_m1_med_1_lev <- MASS::glm.nb(mediation_cumsum ~ hligo_ipol_y0 + hsigo_other_y0 +
                                      bdbest_sum_log + conflictyears_log + territorial_conflict + gems_drugs_oil + 
                                      population_log_y0 + vdem_polyarchy_ctr_y0 + gdppc2005_log_y0 + 
                                      ai_df_std_y0 + pivv_us_std_y0, 
                                    data = d_h1_c))

coefmap <- c("hligo_ipol_y0" = "IGOs with high economic leverage before conflict",
             "hsigo_other_y0" = "All other HSIGOs before conflict",
             "hsigo_minus_hligo_y0" = "All other HSIGOs before conflict",
             "I(msigo_ipol_y0 + lsigo_ipol_y0)" = "All other IGOs",
             "bdbest_sum_log" = "Battle deaths throughout conflict (logged)",
             "mediation_cumsum_log_lag" = "Prior years with mediation (logged)",
             "conflictyears_log" = "Conflict duration (years logged)",
             "territorial_conflict" = "Territorial conflict",
             "gems_drugs_oil" = "Natural resources",
             "population_log_y0" = "Population before conflict (logged)",
             "vdem_polyarchy_ctr_y0" = "Democracy before conflict",
             "gdppc2005_log_y0" = "GDP per capita before conflict (logged)",
             "ai_df_std_y0" = "Trade globalization before conflict",
             "pivv_us_std_y0" = "UNGA proximity to US before conflict",
             "hligo_ipol_region_y0" = "Instrument for IGOs with high economic leverage before conflict",
             "(Intercept)" = "Intercept")

modelsummary(h1_c_m1_med_1_lev, 
             output = "Output/Tables/h1_c_m1_med_1_lev.tex",
             estimate = "estimate",
             statistic = "std.error",
             vcov = ~ region23, 
             stars = c("*" = .05),
             coef_map = coefmap,
             gof_omit = "AIC|BIC|RMSE",
             # align = "c",
             notes = "Standard errors, clustered by region, in parentheses.",
             title = "Evidence on H1: Negative binomial estimates of mediation incidents, 1989--2011. Unit of analysis: conflict."
)

# Effect

effects::Effect("hligo_ipol_y0",
                mod = h1_c_m1_med_1_lev, 
                xlevels = list(hligo_ipol_y0 = c(1:10)))

# standardized coefficients

h1_c_m1_med_1_lev_stdcoef <- as.data.frame(parameters::standardize_parameters(h1_c_m1_med_1_lev))
h1_m1_med_1_lev_stdcoef <- as.data.frame(parameters::standardize_parameters(h1_m1_med_1_lev))

# plot ... as facets

h1_c_m1_med_1_lev_stdcoef <- dplyr::filter(h1_c_m1_med_1_lev_stdcoef, Parameter != "(Intercept)")
h1_m1_med_1_lev_stdcoef <- dplyr::filter(h1_m1_med_1_lev_stdcoef, Parameter != "(Intercept)" & Parameter != "spell_time" & Parameter != "spell_time2" & Parameter != "spell_time3")

h1_c_m1_med_1_lev_stdcoef$Parameter <- c("IGOs with high economic leverage", "All other HSIGOs", "Battle deaths", "Conflict duration", 
                                         "Territorial conflict", "Natural resources", "Population (logged)", 
                                         "Democracy", "GDP per capita", "Trade globalization", 
                                         "UNGA proximity to US")
h1_c_m1_med_1_lev_stdcoef$ypos <- c(13:3)

h1_m1_med_1_lev_stdcoef$Parameter <- c("IGOs with high economic leverage", "All other HSIGOs", "Battle deaths", "Prior years with mediation", 
                                       "Conflict duration", "Territorial conflict", "Natural resources", "Population (logged)", 
                                       "Democracy", "GDP per capita", "Trade globalization", 
                                       "UNPKO", "UNGA proximity to US")
h1_m1_med_1_lev_stdcoef$ypos <- c(13:11, 2, 10:4, 1, 3)

h1_c_m1_med_1_lev_stdcoef$`Unit of analysis` <- "y: Mediation events\nduring conflict\n(Negative binomial estimates)"
h1_m1_med_1_lev_stdcoef$`Unit of analysis` <- "y: Mediation in a\ngiven conflict-year\n(Probit estimates)"

h1_std_coef <- rbind(h1_c_m1_med_1_lev_stdcoef, h1_m1_med_1_lev_stdcoef)

h1_std_coef_p <- ggplot(h1_std_coef, aes(x = Std_Coefficient, y = reorder(Parameter, ypos))) + 
  geom_abline(intercept = 1:13, slope = 0, color = "gray") + 
  geom_vline(xintercept = 0, color = "gray", linetype = "dashed") + 
  # geom_segment(aes(x = CI_low, xend = CI_high, yend = reorder(Parameter, ypos)), size = 2, lineend = "round") + 
  geom_segment(aes(x = CI_low, xend = CI_high, yend = reorder(Parameter, ypos)), linewidth = 1.25) + 
  # geom_point(color = "white") + 
  geom_point(shape = 21, size = 3, fill = "black") + 
  facet_wrap(~ `Unit of analysis`, scales = "free_x") + 
  labs(x = "Standardized coefficient (outcome: mediation)", y = "",
       caption = "Standardized coefficients show the predicted difference in g(y) given a one-standard deviation difference in the explanatory variable.\nWhiskers show 95% confidence intervals based on standard errors clustered by region.") + 
  theme_jk() + theme(axis.line.y = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank())

ggsave(h1_std_coef_p, file = "Output/Figures/h1_m1_med_1_lev3_stdcoef.pdf", width = 8, height = 4)
ggsave(h1_std_coef_p, file = "Output/Figures/h1_m1_med_1_lev3_stdcoef.eps", width = 8, height = 4)
ggsave(h1_std_coef_p, filename = "Output/Figures/h1_m1_med_1_lev3_stdcoef.jpg",
       device = "jpg", dpi = 350,
       width = 8, height = 4, units = "in")

# Predictions for IC comparison

d_h1_c$hsigo_minus_hligo_y0 <- d_h1_c$hsigo_ipol_y0 - d_h1_c$hligo_ipol_y0
h1_c_m1_med_1_lev_ic <- MASS::glm.nb(mediation_cumsum ~ hligo_ipol_y0 + hsigo_other_y0 +
                                 bdbest_sum_log + conflictyears_log + territorial_conflict + gems_drugs_oil + 
                                 population_log_y0 + vdem_polyarchy_ctr_y0 + gdppc2005_log_y0 + 
                                 ai_df_std_y0 + pivv_us_std_y0, 
                               data = d_h1_c)

effects::Effect("hligo_ipol_y0",
       mod = h1_c_m1_med_1_lev_ic, 
       xlevels = list(hligo_ipol_y0 = c(3, 5, 8)),
       fixed.predictors = list(hsigo_minus_hligo_y0 = median(model.frame(h1_c_m1_med_1_lev_ic)$hsigo_minus_hligo_y0, na.rm = TRUE),
                               bdbest_sum_log = median(model.frame(h1_c_m1_med_1_lev_ic)$bdbest_sum_log, na.rm = TRUE), 
                               conflictyears_log = median(model.frame(h1_c_m1_med_1_lev_ic)$conflictyears_log, na.rm = TRUE), 
                               territorial_conflict = median(model.frame(h1_c_m1_med_1_lev_ic)$territorial_conflict, na.rm = TRUE), 
                               gems_drugs_oil = median(model.frame(h1_c_m1_med_1_lev_ic)$gems_drugs_oil, na.rm = TRUE), 
                               population_log_y0 = median(model.frame(h1_c_m1_med_1_lev_ic)$population_log_y0, na.rm = TRUE), 
                               vdem_polyarchy_ctr_y0 = median(model.frame(h1_c_m1_med_1_lev_ic)$vdem_polyarchy_ctr_y0, na.rm = TRUE), 
                               gdppc2005_log_y0 = median(model.frame(h1_c_m1_med_1_lev_ic)$gdppc2005_log_y0, na.rm = TRUE), 
                               ai_df_std_y0 = median(model.frame(h1_c_m1_med_1_lev_ic)$ai_df_std_y0, na.rm = TRUE), 
                               pivv_us_std_y0 = median(model.frame(h1_c_m1_med_1_lev_ic)$pivv_us_std_y0, na.rm = TRUE)))

# linear model instead

summary(h1_c_m1_med_1_lm_lev <- lm(mediation_cumsum_log ~ hligo_ipol_y0 + hsigo_other_y0 +
                                     bdbest_sum_log + conflictyears_log + territorial_conflict + gems_drugs_oil + 
                                     population_log_y0 + vdem_polyarchy_ctr_y0 + gdppc2005_log_y0 + 
                                     ai_df_std_y0 + pivv_us_std_y0, 
                                   data = d_h1_c))

modelsummary(h1_c_m1_med_1_lm_lev, 
             output = "Output/Tables/h1_c_m1_med_1_lev_lm.tex",
             estimate = "estimate",
             statistic = "std.error",
             vcov = ~ region23, 
             stars = c("*" = .05),
             coef_map = coefmap,
             gof_omit = "AIC|BIC|RMSE",
             # align = "c",
             notes = "Standard errors, clustered by region, in parentheses.",
             title = "Evidence on H1: OLS estimates of mediation incidents, 1989--2011. Unit of analysis: conflict."
)

### Model IGO memberships via instrument 

d_c_iv <- d_c[d_c$cwm_sample == 1 | d_c$mic_sample == 1, ]

summary(h1_c_m1_med_1_hligo_iv <- iv_robust(mediation_cumsum_log ~ hligo_ipol_y0 + 
                                              bdbest_sum_log + conflictyears_log + territorial_conflict + gems_drugs_oil + 
                                              population_log_y0 + vdem_polyarchy_ctr_y0 + gdppc2005_log_y0 + 
                                              ai_df_std_y0 + pivv_us_std_y0 | hligo_ipol_region_y0  + 
                                              bdbest_sum_log + conflictyears_log + territorial_conflict + gems_drugs_oil + 
                                              population_log_y0 + vdem_polyarchy_ctr_y0 + gdppc2005_log_y0 + 
                                              ai_df_std_y0 + pivv_us_std_y0, 
                                            data = d_c[d_c$cwm_sample == 1 | d_c$mic_sample == 1, ],
                                            clusters = region23,
                                            se_type = "CR2",
                                            diagnostics = TRUE))

h1_c_m1_med_1_hligo_iv$diagnostic_first_stage_fstatistic
h1_c_m1_med_1_hligo_iv$diagnostic_endogeneity_test
h1_c_m1_med_1_hligo_iv$diagnostic_overid_test

modelsummary(h1_c_m1_med_1_lm_lev, 
             output = "Output/Tables/h1_c_m1_med_1_lev_iv.tex",
             estimate = "estimate",
             statistic = "std.error",
             vcov = ~ region23, 
             stars = c("*" = .05),
             coef_map = coefmap,
             gof_omit = "AIC|BIC|RMSE",
             # align = "c",
             notes = "Standard errors, clustered by region, in parentheses.",
             title = "Evidence on H1: Instrumental variable estimates of total mediation years (logged), 1989--2011. Unit of analysis: conflict."
)

# to show first stage, use lm directly:

summary(h1_c_m1_med_1_hligo_iv_felm_iv_firststage <- lm(hligo_ipol_y0 ~ 
                                                          hligo_ipol_region_y0 + bdbest_sum_log + conflictyears_log + territorial_conflict + gems_drugs_oil + 
                                                          population_log_y0 + vdem_polyarchy_ctr_y0 + gdppc2005_log_y0 + 
                                                          ai_df_std_y0 + pivv_us_std_y0,                                                  ,
                                                        data = d_c[d_c$cwm_sample == 1 | d_c$mic_sample == 1, ]))

modelsummary(h1_c_m1_med_1_hligo_iv_felm_iv_firststage, 
             output = "Output/Tables/h1_c_m1_med_1_lev_iv_firststage.tex",
             estimate = "estimate",
             statistic = "std.error",
             vcov = ~ region23, 
             stars = c("*" = .05),
             coef_map = coefmap,
             gof_omit = "AIC|BIC|RMSE",
             # align = "c",
             notes = "Standard errors, clustered by region, in parentheses.",
             title = "First stage estimates of instrumental variable results from Table XXX. Unit of analysis: conflict."
)

# FYI, same results in Stata

# rio::export(dplyr::select(d_c[d_c$cwm_sample == 1 | d_c$mic_sample == 1, ], 
#                           mediation_cumsum_log, hligo_ipol_y0, hligo_ipol_region_y0,
#                             bdbest_sum_log, conflictyears_log, territorial_conflict, gems_drugs_oil, 
#                             population_log_y0, vdem_polyarchy_ctr_y0, gdppc2005_log_y0, 
#                             ai_df_std_y0, pivv_us_std_y0, cowcode, Year, region23), 
#             file = "Tables/iv_stata.dta", version = 13)

# ivregress 2sls mediation_cumsum_log bdbest_sum_log conflictyears_log territorial_conflict gems_drugs_oil population_log_y0 vdem_polyarchy_ctr_y0 gdppc2005_log_y0 ai_df_std_y0 pivv_us_std_y0 (hligo_ipol_y0 = hligo_ipol_region_y0), vce(cluster region23) first small
# 
# estat endogenous
# estat firststage

# Imputation

set.seed(201907)
h1_c_m1_med_1_imp <- Amelia::amelia(x = data.frame(dplyr::select(d_h1_c, cowcode, mediation_cumsum, 
                                                                 hligo_ipol_y0, hsigo_other_y0,
                                                                 bdbest_sum_log, conflictyears_log, territorial_conflict, 
                                                                 gems_drugs_oil, exclpop_std, population_log_y0, vdem_polyarchy_ctr_y0, 
                                                                 gdppc2005_log_y0, tradegdp01_y0, fdi_stock_log_y0, ai_df_std_y0, aid_commitment_mill_y0, 
                                                                 ext_int_mapo, pivv_us_std_y0)), 
                                    cs = "cowcode", id.vars = "ID_ucdp", startvals = 1)


# HLIGOs

print(h1_c_m1_med_1_lev_zelig <- Zelig::zelig(formula(h1_c_m1_med_1_lev), 
                                          model = "negbin", data = h1_c_m1_med_1_imp))

texreg(list(Zelig::from_zelig_model(h1_c_m1_med_1_lev_zelig)[[1]]),
       file = "Output/Tables/h1_c_m1_med_1_lev_imp.tex",
       override.coef = Zelig::combine_coef_se(h1_c_m1_med_1_lev_zelig)[, 1], 
       override.se = Zelig::combine_coef_se(h1_c_m1_med_1_lev_zelig)[, 2], 
       override.pvalues = Zelig::combine_coef_se(h1_c_m1_med_1_lev_zelig)[, 4],
       stars = c(0.1),
       digits = 3,
       custom.coef.names = c("Intercept", "IGOs with high economic leverage before conflict", "All other HSIGOs before conflict",
                             "Battle deaths throughout conflict (logged)", 
                             "Conflict duration (years logged)", "Territorial conflict", "Natural resources", 
                             "Population before conflict (logged)", "Democracy before conflict", "GDP per capita before conflict (logged)", 
                             "Trade globalization before conflict", "UNGA proximity to US before conflict"),
       reorder.coef = c(2:length(h1_c_m1_med_1_lev$coefficients), 1),
       center = TRUE,
       caption = "Evidence on H1: Negative binomial estimates of total mediation years, 1989--2011. Unit of analysis: conflict. Estimates after multiple imputation.",
       caption.above = TRUE,
       custom.note = "\\$p < 0.05\\$, one-tailed tests. Standard errors in parentheses.",
       label = "tab:h1_c_m1_med_1_lev_imp",
       booktabs = TRUE,
       dcolumn = TRUE,
       use.packages = FALSE,
       fontsize = "scriptsize")

# Coverage

h1_c_coverage <- model.frame(mediation_cumsum ~ hsigo_ipol_y0 + cowcode + Year + ID_ucdp, 
                             data = d_h1_c)

table(h1_c_coverage$mediation_cumsum)
table(ifelse(h1_c_coverage$mediation_cumsum == 0, 0, ifelse(h1_c_coverage$mediation_cumsum > 1, 2, 1)))

# Summary statistics

h1_c_sumstats <- dplyr::select(dplyr::filter(d_c, (cwm_sample == 1 | mic_sample == 1) & is.na(mediation) == FALSE & is.na(hsigo_ipol_y0) == FALSE),
                               `Total mediation years` = mediation_cumsum,
                               `Memberships in IGOs with high economic leverage, before conflict` = hligo_ipol_y0,
                               `Memberships in other HSIGOs` = hsigo_other_y0,
                               `Battle deaths throughout conflict (logged)` = bdbest_sum_log,
                               `Conflict duration (years logged)` = conflictyears_log,
                               `Territorial conflict` = territorial_conflict,
                               `Natural resources` = gems_drugs_oil,
                               `Population before conflict (logged)` = population_log_y0,
                               `Democracy before conflict` = vdem_polyarchy_ctr_y0,
                               `GDP per capita before conflict (logged)` = gdppc2005_log_y0,  
                               `Trade globalization before conflict` = ai_df_std,
                               `UNGA proximity to US before conflict` = pivv_us_std,
                               `COW code` = cowcode,
                               `Year` = Year)

h1_c_sumstats <- as.data.frame(h1_c_sumstats)
h1_c_sumstats <- dplyr::select(h1_c_sumstats, -`COW code`, -`Year`)

datasummary(formula = All(h1_c_sumstats) ~ Mean + SD + Min + Median + Max + N,
            data = h1_c_sumstats,
            output = "Output/Tables/h1_c_m1_med_1_sumstats_all.tex",
            title = "Summary statistics for analyses of mediation at the conflict; all observations.",
            notes = "")


h1_c_m1_sumstats <- dplyr::select(dplyr::filter_at(d_c, vars((colnames(model.frame(h1_c_m1_med_1_lev)))), all_vars(!is.na(.) & (cwm_sample == 1 | mic_sample == 1))),
                                  `Total mediation years` = mediation_cumsum,
                                  `Memberships in IGOs with high economic leverage, before conflict` = hligo_ipol_y0,
                                  `Memberships in other HSIGOs` = hsigo_other_y0,
                                  `Battle deaths throughout conflict (logged)` = bdbest_sum_log, 
                                  `Conflict duration (years logged)` = conflictyears_log,
                                  `Territorial conflict` = territorial_conflict,
                                  `Natural resources` = gems_drugs_oil, 
                                  `Population before conflict (logged)` = population_log_y0,
                                  `Democracy before conflict` = vdem_polyarchy_ctr_y0,
                                  `GDP per capita before conflict (logged)` = gdppc2005_log_y0,
                                  `Trade globalization before conflict` = ai_df_std,
                                  `UNGA proximity to US before conflict` = pivv_us_std,
                                  `COW code` = cowcode,
                                  `Year` = Year )

# Table with IGO memberships

h1_c_m1_sumstats$Country <- countrycode::countrycode(sourcevar = h1_c_m1_sumstats$`COW code`, origin = "cown", destination = "country.name")
country_table <- select(h1_c_m1_sumstats, Country, Year, `Memberships in IGOs with high economic leverage, before conflict`)

print(xtable(x = ungroup(country_table), 
       caption = "List of countries with conflicts in the data and their IGO membership counts",
       label = "tab:conflict_list",
       align = "llll"),
      type = "latex",
      file = "Output/Tables/tab_conflict_list.tex",
      caption.placement = "top",
      include.rownames = FALSE,
      )
      
h1_c_m1_sumstats <- as.data.frame(h1_c_m1_sumstats)
h1_c_m1_sumstats <- dplyr::select(h1_c_m1_sumstats, -`COW code`, -`Year`, -`Country`)

datasummary(formula = All(h1_c_m1_sumstats) ~ Mean + SD + Min + Median + Max + N,
            data = h1_c_m1_sumstats,
            output = "Output/Tables/h1_c_m1_med_1_sumstats_sample.tex",
            title = "Summary statistics for analyses of mediation at the conflict; estimation sample only.",
            notes = "")


# Effect plots

p_h1_c_m1_med_1_lev <- predictions(
  mod = h1_c_m1_med_1_lev,
  type = "response",
  vcov = ~ region23,
  conf_level = 0.834,
  newdata = datagrid(hligo_ipol_y0 = 1:9, grid_type = "counterfactual"))
p_h1_c_m1_med_1_lev_df <- tidy(p_h1_c_m1_med_1_lev, by = "hligo_ipol_y0", conf_level = 0.834)

h1_m1_c_med_1_lev_rug <- data.frame(hligo_ipol_y0 = model.frame(h1_c_m1_med_1_lev)[, 2])
h1_m1_c_med_1_lev_rug_long <- pivot_longer(data = h1_m1_c_med_1_lev_rug, cols = everything(), names_to = "variable", values_to = "value")
h1_m1_c_med_1_lev_rug_long <- dplyr::summarize(group_by(h1_m1_c_med_1_lev_rug_long, value), count = n())

h1_m1_c_med_1_lev.eff.p <- ggplot(data = p_h1_c_m1_med_1_lev_df, 
                                  aes(x = hligo_ipol_y0, y = estimate)) +
  geom_segment(aes(y = conf.low, yend = conf.high, xend = hligo_ipol_y0), alpha = 1) + 
  geom_line(aes(group = 1), linewidth = 0.5, lineend = "round") + 
  geom_point(color = "black") +
  xlab("Memberships in IGOs with high economic leverage") + 
  ylab("Predicted count of mediation years during conflict") +
  scale_x_continuous(breaks = c(0, 5, 10), limits = c(0, 10)) +
  theme_jk()

h1_m1_c_med_1_lev.hist.p <- ggplot(h1_m1_c_med_1_lev_rug_long, aes(x = value, y = count)) +   
  geom_bar(position = "dodge", stat = "identity", fill = "black") + 
  guides(color = "none") + 
  xlab("Memberships in IGOs with high economic leverage") + 
  ylab("Count") + 
  scale_x_continuous(breaks = c(0, 5, 10), limits = c(0, 10)) +
  theme_jk()

# cairo_pdf("Output/Figures/h1_h1c_m1_hligo_eff.pdf", width = 9, height = 7, )
pdf("Output/Figures/h1_h1c_m1_hligo_eff.pdf", width = 9, height = 7)
cowplot::plot_grid(h1_m1_med_1_lev.eff.p + labs(title = "Predicted probability of Mediation\nin a given year"), h1_m1_c_med_1_lev.eff.p + labs(title = "Predicted count of years\nwith mediation throughout conflict "),
                   h1_m1_med_1_lev.hist.p, h1_m1_c_med_1_lev.hist.p, 
                   ncol = 2, align = "v",
                   rel_heights = c(4, 2),
                   labels = c("(a)", "(b)"))
dev.off()

#################################
### H2: Mediation efforts are more likely to end in comprehensive peace agreements 
### after domestic armed conflicts in countries that are more involved in HSIGOs, 
### compared to conflicts in conflicts in countries that are less involved in HSIGOs
#################################

d_ts <- rio::import("Data/HSIGO-CPA_cy_ts.csv")
# y = pam_agneg2 (whether or not the given observation is the year in which the agreement was first negotiated)
d_h2_c <- d_h1_c

summary(h2_c_m1_pa_lev_1 <- glm(pam_agneg2 ~ hligo_ipol_lag * mediation_cumsum_log_lag + 
                                  bdbest_sum_log + conflictyears_log + territorial_conflict + gems_drugs_oil + exclpop_std + 
                                  population_log_lag + vdem_polyarchy_ctr_lag + gdppc2005_log_lag + UNPKO_yes_lag, 
                                data = d_h2_c, family = binomial(link = "probit")))

coefmap <- c("hligo_ipol_lag" = "IGOs with high economic leverage",
             "bdbest_sum_log" = "Battle deaths throughout conflict (logged)",
             "mediation_cumsum_log_lag" = "Prior years with mediation (logged)",
             "conflictyears_log" = "Conflict duration (years logged)",
             "territorial_conflict" = "Territorial conflict",
             "gems_drugs_oil " = "Natural resources",
             "exclpop_std" = "Excluded population",
             "population_log_lag" = "Population before conflict (logged)",
             "vdem_polyarchy_ctr_lag" = "Democracy before conflict",
             "gdppc2005_log_lag" = "GDP per capita before conflict (logged)",
             "UNPKO_yes_lag" = "UNPKO",
             "hligo_ipol_lag:mediation_cumsum_log_lag" = "IGOs x Prior years with mediation (logged)")

# IC comparison

predict(h2_c_m1_pa_lev_1, newdata = data.frame(hligo_ipol_lag = c(3, 5, 8), mediation_cumsum_log_lag = log(6),
                                               bdbest_sum_log = median(model.frame(h2_c_m1_pa_lev_1)$bdbest_sum_log, na.rm = TRUE), 
                                               conflictyears_log = median(model.frame(h2_c_m1_pa_lev_1)$conflictyears_log, na.rm = TRUE), 
                                               territorial_conflict = median(model.frame(h2_c_m1_pa_lev_1)$territorial_conflict, na.rm = TRUE), 
                                               gems_drugs_oil = median(model.frame(h2_c_m1_pa_lev_1)$gems_drugs_oil, na.rm = TRUE), 
                                               exclpop_std = median(model.frame(h2_c_m1_pa_lev_1)$exclpop_std, na.rm = TRUE),
                                               population_log_lag = median(model.frame(h2_c_m1_pa_lev_1)$population_log_lag, na.rm = TRUE), 
                                               vdem_polyarchy_ctr_lag = median(model.frame(h2_c_m1_pa_lev_1)$vdem_polyarchy_ctr_lag, na.rm = TRUE), 
                                               gdppc2005_log_lag = median(model.frame(h2_c_m1_pa_lev_1)$gdppc2005_log_lag, na.rm = TRUE), 
                                               UNPKO_yes_lag = median(model.frame(h2_c_m1_pa_lev_1)$UNPKO_yes_lag, na.rm = TRUE)), 
        type = "response", se.fit = TRUE)

# IC comparison end

modelsummary(h2_c_m1_pa_lev_1, 
             output = "Output/Tables/h2_c_m1_pa_lev_1.tex",
             estimate = "estimate",
             statistic = "std.error",
             vcov = ~ region23, 
             stars = c("*" = .05),
             coef_map = coefmap,
             gof_omit = "AIC|BIC|RMSE",
             # align = "c",
             notes = "Standard errors, clustered by region, in parentheses.",
             title = "Evidence on H2, using IGOs with high economic leverage: Probit estimates of CPA signings, 1989–2011. Unit of analysis: conflict."
)

# Imputation

set.seed(201907)
h2_c_m1_pa_1_imp <- Amelia::amelia(x = data.frame(dplyr::select(d_c[d_c$cwm_sample == 1 | d_c$mic_sample == 1, ], cowcode, pam_agneg2,
                                                                mediation_cumsum_log_lag, hsigo_ipol_lag, hligo_ipol_lag, bdbest_sum_log, conflictyears_log, territorial_conflict,
                                                                gems_drugs_oil, exclpop_std, population_log_lag, democracy6_lag, chga_demo_lag, vdem_polyarchy_ctr_lag,
                                                                gdppc2005_log_lag, tradegdp01_lag, fdi_stock_log_lag, ai_df_std, aii_df_std, aid_commitment_lag_mill_std,
                                                                UNPKO_yes_lag, UNPKO_yes_lag, pivv_us_std)),
                                   cs = "cowcode")

set.seed(201907)
h2_c_m1_pa_lev_1_zelig <- Zelig::zelig(pam_agneg2 ~ hligo_ipol_lag * mediation_cumsum_log_lag + 
                                         bdbest_sum_log + conflictyears_log + territorial_conflict + gems_drugs_oil + exclpop_std + 
                                         population_log_lag + vdem_polyarchy_ctr_lag + gdppc2005_log_lag + UNPKO_yes_lag, 
                                       model = "probit", data = h2_c_m1_pa_1_imp)

texreg(list(Zelig::from_zelig_model(h2_c_m1_pa_lev_1_zelig)[[1]]),
       file = "Output/Tables/h2_c_m1_pa_lev_1_imp.tex",
       single.row = FALSE,
       stars = c(0.05),
       custom.coef.names = c("Intercept", "IGOs with high economic leverage", "Prior years with mediation (logged)", "Battle deaths in prior year (logged)", 
                             "Conflict duration (years logged)", "Territorial conflict", "Natural resources", "Excluded population",
                             "Population (logged)", "Democracy", "GDP per capita (logged)", "UNPKO", "HSIGO memberships times Prior years with mediation (logged)"),
       override.coef = Zelig::combine_coef_se(h2_c_m1_pa_lev_1_zelig)[, 1], 
       override.se = Zelig::combine_coef_se(h2_c_m1_pa_lev_1_zelig)[, 2], 
       override.pvalues = Zelig::combine_coef_se(h2_c_m1_pa_lev_1_zelig)[, 4],
       reorder.coef = c(2, 3, length(h2_c_m1_pa_lev_1$coefficients), 4:(length(h2_c_m1_pa_lev_1$coefficients)-1), 1),
       center = TRUE,
       caption = "Evidence on H2, using IGOs with high economic leverage: Probit estimates of CPA signings, 1989--2011. Unit of analysis: conflict. Estimates after multiple imputation.",
       caption.above = TRUE,
       label = "tab:",
       booktabs = TRUE,
       dcolumn = TRUE,
       use.packages = FALSE,
       fontsize = "scriptsize")

# Effect plots for HLIGOs

h2_c_m1_pa_lev_1_simdat <- datagrid(mediation_cumsum_log_lag = log(c(0, 1, 5, 10, 15) + 1), 
                                    hligo_ipol_lag = c(3, 5, 7),
                                    model = h2_c_m1_pa_lev_1,
                                    grid_type = "typical")

h2_c_m1_pa_lev_1_simdat_pred <- predictions(model = h2_c_m1_pa_lev_1,
                                            newdata = h2_c_m1_pa_lev_1_simdat,
                                            vcov = ~ region23,
                                            conf.level = 0.834)

h2_c_m1_pa_lev_1_simdat_pred$mediation_cumsum_lag <- factor(h2_c_m1_pa_lev_1_simdat_pred$mediation_cumsum_log_lag, levels = sort(unique(h2_c_m1_pa_lev_1_simdat_pred$mediation_cumsum_log_lag)), labels = c(0, 1, 5, 10, 15))
h2_c_m1_pa_lev_1_simdat_pred$hligo_ipol_lag_factor <- factor(as.character(h2_c_m1_pa_lev_1_simdat_pred$hligo_ipol_lag), labels = c("3 IGO memberships", "5 IGO memberships\n(sample mean)", "7 IGO memberships"), ordered = TRUE)

h2_c_m1_pa_lev_1_eff_p <- ggplot(data = h2_c_m1_pa_lev_1_simdat_pred, 
                                 aes(x = mediation_cumsum_lag, y = estimate)) +
  geom_segment(aes(y = conf.low, yend = conf.high, xend = mediation_cumsum_lag), color = "black") + 
  facet_wrap(~ hligo_ipol_lag_factor, ncol = 3) + 
  geom_line(aes(x = mediation_cumsum_lag, y = estimate, group = 1), linewidth = 0.5, lineend = "round") + 
  geom_point(color = "black") +
  labs(x = "Prior conflict years with mediation\n(back-transformed from logged count)", 
       y = "Probability of CPA signing",
       caption = "Predictions based on typical values\nN = 168 conflicts\nPrediction errors clustered by region") +
  theme_jk()

ggsave(h2_c_m1_pa_lev_1_eff_p, file = "Output/Figures/h2_c_m1_pa_lev_1_eff.pdf",
       width = 10, height = 5) # , device = cairo_pdf)
ggsave(h2_c_m1_pa_lev_1_eff_p, file = "Output/Figures/h2_c_m1_pa_lev_1_eff.eps",
       width = 10, height = 5) 
ggsave(h2_c_m1_pa_lev_1_eff_p, file = "Output/Figures/h2_c_m1_pa_lev_1_eff.jpg",
       device = "jpg", dpi = 350, width = 10, height = 5) 

# Effect plot for HLIGOs based on model with imputed data,
# using simulation (as I do below after biprobit)

h1h2_hligo_zelig_coef <- Zelig::combine_coef_se(h2_c_m1_pa_lev_1_zelig)[, 1]

h1h2_hligo_zelig_vcov <- Zelig::vcov(h2_c_m1_pa_lev_1_zelig)[[1]]
detach(package:Zelig)

h2_c_m1_pa_lev_1_simdat$`hligo_ipol_lag:mediation_cumsum_log_lag` <- h2_c_m1_pa_lev_1_simdat$hligo_ipol_lag * h2_c_m1_pa_lev_1_simdat$mediation_cumsum_log_lag
h2_c_m1_pa_lev_1_simdat$`(Intercept)` <- 1
h2_c_m1_pa_lev_1_simdat <- h2_c_m1_pa_lev_1_simdat[, c("(Intercept)", "hligo_ipol_lag", "mediation_cumsum_log_lag", 
                                                       "bdbest_sum_log", "conflictyears_log", "territorial_conflict", 
                                                       "gems_drugs_oil", "exclpop_std", "population_log_lag", "vdem_polyarchy_ctr_lag", 
                                                       "gdppc2005_log_lag", "UNPKO_yes_lag", "hligo_ipol_lag:mediation_cumsum_log_lag"
)]

# bX

set.seed(123)
h1h2_hligo_zelig_eq2_sim <- MASS::mvrnorm(n = 1000, mu = h1h2_hligo_zelig_coef, Sigma = h1h2_hligo_zelig_vcov)

h1h2_hligo_zelig_simpp_hligo <- pnorm(t(data.matrix(h2_c_m1_pa_lev_1_simdat) %*% t(h1h2_hligo_zelig_eq2_sim)))

colnames(h1h2_hligo_zelig_simpp_hligo) <- as.character(c(1:15))
h1h2_hligo_zelig_simpp_hligo_long <- h1h2_hligo_zelig_simpp_hligo |> 
  as.data.frame() |> 
  pivot_longer(cols = everything(), names_to = "rowid", values_to = "predicted")
h1h2_hligo_zelig_simpp_hligo_long$hligo_ipol_lag = rep(h2_c_m1_pa_lev_1_simdat$hligo_ipol_lag, 1000)
h1h2_hligo_zelig_simpp_hligo_long$mediation_cumsum_log_lag = rep(h2_c_m1_pa_lev_1_simdat$mediation_cumsum_log_lag, 1000)

h1h2_hligo_zelig_simpp_hligo_dat <- h1h2_hligo_zelig_simpp_hligo_long |> 
  group_by(hligo_ipol_lag, mediation_cumsum_log_lag) |> 
  dplyr::summarize(
    pred = quantile(predicted, probs = 0.5),
    conf.low = quantile(predicted, probs = 0.083),
    conf.high = quantile(predicted, probs = 0.917))

h1h2_hligo_zelig_simpp_hligo_dat$predicted <- h1h2_hligo_zelig_simpp_hligo_dat$pred

h1h2_hligo_zelig_simpp_hligo_dat$mediation_cumsum_lag <- factor(h1h2_hligo_zelig_simpp_hligo_dat$mediation_cumsum_log_lag, levels = sort(unique(h1h2_hligo_zelig_simpp_hligo_dat$mediation_cumsum_log_lag)), labels = c(0, 1, 5, 10, 15))
h1h2_hligo_zelig_simpp_hligo_dat$hligo_ipol_lag_factor <- factor(as.character(h1h2_hligo_zelig_simpp_hligo_dat$hligo_ipol_lag), labels = c("3 IGO memberships", "5 IGO memberships\n(sample mean)", "7 IGO memberships"), ordered = TRUE)

h2_c_m1_pa_lev_1_zelig_eff_p <- ggplot(data = h1h2_hligo_zelig_simpp_hligo_dat, 
                                       aes(x = mediation_cumsum_lag, y = predicted)) +
  geom_segment(aes(y = conf.low, yend = conf.high, xend = mediation_cumsum_lag), color = "black") + 
  facet_wrap(~ hligo_ipol_lag_factor, ncol = 3) + 
  geom_line(aes(x = mediation_cumsum_lag, y = predicted, group = 1), linewidth = 0.5, lineend = "round") + 
  geom_point(color = "black") +
  labs(x = "Prior conflict years with mediation\n(back-transformed from logged count)", 
       y = "Probability of CPA signing",
       caption = "Predictions based on typical values\nN = 185 conflicts\nPrediction errors clustered by region") +
  theme_jk()

ggsave(h2_c_m1_pa_lev_1_zelig_eff_p, file = "Output/Figures/h2_c_m1_pa_lev_1_zelig_eff.pdf",
       width = 10, height = 5) #, device = cairo_pdf)

# Summary statistics

h1_h3_c_sumstats <-  d_c[(d_c$cwm_sample == 1 | d_c$mic_sample == 1) & is.na(d_c$pam_agneg2) == FALSE & is.na(d_c$hsigo_ipol_lag) == FALSE, c("mediation_cumsum", "pam_agneg2", "hsigo_ipol_lag", "hligo_ipol_lag", "mediation_cumsum_lag", "bdbest_sum_log", "conflictyears_log", "territorial_conflict", "gems_drugs_oil", "exclpop_std", "population_log_lag", "vdem_polyarchy_ctr_lag", "gdppc2005_log_lag", "ai_df_std", "UNPKO_yes_lag", "ext_int_mapo", "pivv_us_std", "cowcode", "Year", "pam_caseid", "pam_country", "pam_accord_name")]

# List of CPAs

arrange(h1_h3_c_sumstats[h1_h3_c_sumstats$pam_agneg2 == 1, c("pam_caseid", "pam_country", "pam_accord_name")], pam_caseid)
pam_original <- rio::import("Data/PAM_ID V.1.5 Updated 29JULY2015.xlsx")
cpa_list <- pam_original[pam_original$pam_caseid %in% h1_h3_c_sumstats[h1_h3_c_sumstats$pam_agneg2 == 1, ]$pam_caseid & pam_original$year_count == 1, c("country", "accord name")]
cpa_list <- cpa_list[is.na(cpa_list$country) == FALSE, ]
cpa_list$`accord name` <- gsub(pattern = " ,", replacement = ",", x = cpa_list$`accord name`)
names(cpa_list) <- c("Country", "CPA name and date")
cpa_list <- arrange(cpa_list, Country)

cpa_list_xtable <- xtable(cpa_list, align = "lll", caption = "List of CPAs (used in analyses of H2 and H3)")
print(cpa_list_xtable,
      type = "latex",
      file = "Output/Tables/cpa_list.tex",
      caption.placement = "top",
      latex.environments = "center",
      size = "scriptsize",
      include.rownames = FALSE,
      include.colnames = TRUE,
      booktabs = TRUE)


##########################
### Descriptive statistics to address selection etc.
##########################

# HSIGO memberships for all countries (raw hsigo data) vs. CW countries

conflict_cases <- d_ts[d_ts$cwm_sample == 1 | d_ts$mic_sample == 1, c("cowcode", "Year", "ID_ucdp_year", "conflict", "mediation", "pam_agneg2")]
conflict_cases <- mutate(group_by(conflict_cases, cowcode, Year),
                         counter = 1:n())
conflict_cases <- conflict_cases[conflict_cases$counter == 1, ]
conflict_cases$conflict_obs <- 1
all_cases <- rio::import("Data/hsigo_cow_1945-2015_ipol.csv")
igo_profile <- merge(x = conflict_cases, y = all_cases, by.x = c("cowcode", "Year"), by.y = c("cown", "year"), all.y = TRUE)
igo_profile <- igo_profile[igo_profile$Year >= 1989 & igo_profile$Year <= 2011, ]
igo_profile$`Conflict case` <- ifelse(igo_profile$conflict_obs == 1, "Yes", "No")
igo_profile$`Conflict case` <- ifelse(is.na(igo_profile$conflict_obs) == TRUE, "No", igo_profile$`Conflict case`)
igo_profile$continent <- countrycode::countrycode(sourcevar = igo_profile$cowcode, origin = "cown", destination = "continent")
igo_profile[igo_profile$cowcode == 260 | igo_profile$cowcode == 265 | igo_profile$cowcode == 315 | igo_profile$cowcode == 345, ]$continent <- "Europe"
igo_profile[igo_profile$cowcode == 678 | igo_profile$cowcode == 680 | igo_profile$cowcode == 816, ]$continent <- "Asia"

igo_profile$region <- countrycode::countrycode(sourcevar = igo_profile$cowcode, origin = "cown", destination = "region")
igo_profile[igo_profile$cowcode == 260 | igo_profile$cowcode == 265, ]$region <- "Western Europe"
igo_profile[igo_profile$cowcode == 315 | igo_profile$cowcode == 345, ]$region <- "Eastern Europe"
igo_profile[igo_profile$cowcode == 678 | igo_profile$cowcode == 680 | igo_profile$cowcode == 816, ]$region <- "Western Asia"
# igo_profile[igo_profile$cowcode == 816, ]$region <- "South-Eastern Asia"

p1 <- ggplot(data = igo_profile, aes(x = hsigo_ipol, fill = `Conflict case`)) + 
  geom_bar(alpha = 0.8, position = "stack") + 
  scale_fill_viridis(discrete = TRUE) + 
  theme_jk() +
  theme(legend.position = c(0.8, 0.8)) + 
  xlab("HSIGO memberships") + ylab("") + 
  labs(title = "HSIGOs:\nComparison between countries that did and\ndid not experience conflicts", caption = "Observations: all country-years, 1989-2015", fill = "Experienced domestic\narmed conflict") 

ggsave(p1, file = "Output/Figures/hsigo_bars.pdf", width = 6, height = 4) #, device = cairo_pdf)

p1a <- ggplot(data = igo_profile, aes(x = hligo_ipol, fill = `Conflict case`)) + 
  geom_bar(alpha = 0.8, position = "stack") + 
  scale_fill_viridis(discrete = TRUE) + 
  theme_jk() +
  theme(legend.position = c(0.8, 0.8)) + 
  xlab("IGOs with high economic leverage") + ylab("") + 
  scale_x_continuous(breaks = c(0, 2, 4, 6, 8, 10)) +
  labs(title = "IGOs with high economic leverage: comparison between\ncountries that did and did not experience conflicts", caption = "Observations: all country-years, 1989-2015", fill = "Experienced domestic\narmed conflict") 

ggsave(p1a, file = "Output/Figures/hligo_bars.pdf", width = 6, height = 4) #, device = cairo_pdf)

p4a <- ggplot(data = igo_profile[igo_profile$region != "Australia and New Zealand" & 
                                  igo_profile$region != "Micronesia" & 
                                  igo_profile$region != "Polynesia" & 
                                  igo_profile$region != "Western Europe" & 
                                  igo_profile$region != "Eastern Asia" & 
                                  igo_profile$region != "Southern Africa", ], aes(x = hligo_ipol, fill = `Conflict case`)) + 
  geom_bar(alpha = 1, position = "stack") + 
  facet_wrap(~ region, scales = "free_y") + 
  scale_fill_viridis(discrete = TRUE) + 
  scale_y_continuous(breaks = NULL) + 
  theme_jk() +
  theme(legend.position = "bottom") + 
  xlab("Memberships in IGOs with high economic leverage") + ylab("") +
  labs(title = "Memberships in IGOs with high economic leverage:\ncomparison between countries that did\nand did not experience conflicts", caption = "Observations: all country-years, 1989-2015", fill = "Experienced domestic\narmed conflict") 

ggsave(p4a, file = "Output/Figures/hligo_region_bar.pdf", width = 8, height = 8.5) # , device = cairo_pdf)

# IGO distribution over time etc. but oly in the mediation data

conflict_igos <- dplyr::summarize(group_by(igo_profile[igo_profile$`Conflict case` == "Yes", ], Year),
                           hsigo_ipol_lag_mean = mean(hsigo_ipol_lag, na.rm = TRUE),
                           hsigo_cb_ipol_lag_mean = mean(hsigo_cb_ipol_lag, na.rm = TRUE),
                           hligo_ipol_lag_mean = mean(hligo_ipol_lag, na.rm = TRUE),
                           hsigo_ipol_lag_lo = quantile(hsigo_ipol_lag, probs = 0.25, na.rm = TRUE),
                           hsigo_cb_ipol_lag_lo = quantile(hsigo_cb_ipol_lag, probs = 0.25, na.rm = TRUE),
                           hsigo_ipol_lag_hi = quantile(hsigo_ipol_lag, probs = 0.75, na.rm = TRUE),
                           hsigo_cb_ipol_lag_hi = quantile(hsigo_cb_ipol_lag, probs = 0.75, na.rm = TRUE))

noconflict_igos <- dplyr::summarize(group_by(igo_profile[igo_profile$`Conflict case` == "No" & igo_profile$Year >= 1989 & igo_profile$Year <= 2011, ], Year),
                             hsigo_ipol_lag_mean = mean(hsigo_ipol_lag, na.rm = TRUE),
                             hsigo_cb_ipol_lag_mean = mean(hsigo_cb_ipol_lag, na.rm = TRUE),
                             hsigo_cb_ipol_lag_mean = mean(hsigo_cb_ipol_lag, na.rm = TRUE),
                             hligo_ipol_lag_mean = mean(hligo_ipol_lag, na.rm = TRUE),
                             hsigo_ipol_lag_lo = quantile(hsigo_ipol_lag, probs = 0.25, na.rm = TRUE),
                             hsigo_cb_ipol_lag_lo = quantile(hsigo_cb_ipol_lag, probs = 0.25, na.rm = TRUE),
                             hsigo_ipol_lag_hi = quantile(hsigo_ipol_lag, probs = 0.75, na.rm = TRUE),
                             hsigo_cb_ipol_lag_hi = quantile(hsigo_cb_ipol_lag, probs = 0.75, na.rm = TRUE))

p1 <- ggplot(data = conflict_igos, aes(x = Year, y = hsigo_ipol_lag_mean)) + 
  geom_line(color = viridis(n = 3)[1]) + 
  # geom_line(aes(y = hsigo_ipol_lag_lo), color = viridis(n = 3)[1], linetype = "dashed") + 
  # geom_line(aes(y = hsigo_ipol_lag_hi), color = viridis(n = 3)[1], linetype = "dashed") + 
  # geom_ribbon(aes(ymin = hsigo_ipol_lag_lo, ymax = hsigo_ipol_lag_hi), fill = viridis(n = 3)[1], alpha = 0.25) +
  geom_line(data = noconflict_igos, aes(x = Year, y = hsigo_ipol_lag_mean), color = viridis(n = 3)[3]) +
  # geom_line(data = noconflict_igos, aes(x = Year, y = hsigo_ipol_lag_lo), color = viridis(n = 3)[2], linetype = "dashed") +
  # geom_line(data = noconflict_igos, aes(x = Year, y = hsigo_ipol_lag_hi), color = viridis(n = 3)[2], linetype = "dashed") + 
  # geom_ribbon(data = noconflict_igos, aes(ymin = hsigo_ipol_lag_lo, ymax = hsigo_ipol_lag_hi), fill = viridis(n = 3)[2], alpha = 0.25) + 
  ylim(0, NA) + ylab("Average memberships\nin highly structured IGOs") + 
  annotate(geom = "text", x = 1993, y = 16, label = "Country-years without conflict", color = viridis(n = 3)[1]) + 
  annotate(geom = "text", x = 1996, y = 14, label = "Country-years with conflict", color = viridis(n = 3)[3]) +
  theme_jk()

ggsave(p1, file = "Output/Figures/hsigo_timeline.pdf", width = 6, height = 3) #, device = cairo_pdf)

p2 <- ggplot(data = conflict_igos, aes(x = Year, y = hligo_ipol_lag_mean)) + 
  geom_line(color = viridis(n = 3)[1]) + 
  # geom_line(aes(y = hsigo_ipol_lag_lo), color = viridis(n = 3)[1], linetype = "dashed") + 
  # geom_line(aes(y = hsigo_ipol_lag_hi), color = viridis(n = 3)[1], linetype = "dashed") + 
  # geom_ribbon(aes(ymin = hsigo_ipol_lag_lo, ymax = hsigo_ipol_lag_hi), fill = viridis(n = 3)[1], alpha = 0.25) +
  geom_line(data = noconflict_igos, aes(x = Year, y = hligo_ipol_lag_mean), color = viridis(n = 3)[3]) +
  # geom_line(data = noconflict_igos, aes(x = Year, y = hsigo_ipol_lag_lo), color = viridis(n = 3)[2], linetype = "dashed") +
  # geom_line(data = noconflict_igos, aes(x = Year, y = hsigo_ipol_lag_hi), color = viridis(n = 3)[2], linetype = "dashed") + 
  # geom_ribbon(data = noconflict_igos, aes(ymin = hsigo_ipol_lag_lo, ymax = hsigo_ipol_lag_hi), fill = viridis(n = 3)[2], alpha = 0.25) + 
  ylim(0, NA) + ylab("Average memberships\nin memberships in\nIGOs with high economic leverage") + 
  annotate(geom = "text", x = 2005, y = 4.5, label = "Country-years without conflict", color = viridis(n = 3)[1]) + 
  annotate(geom = "text", x = 2005, y = 5.5, label = "Country-years with conflict", color = viridis(n = 3)[3]) +
  theme_jk()

ggsave(p2, file = "Output/Figures/hligo_timeline.pdf", width = 6, height = 3) # , device = cairo_pdf)

# Mediation over time

conflict_cases <- d_ts[d_ts$cwm_sample == 1 | d_ts$mic_sample == 1, c("cowcode", "Year", "ID_ucdp_year", "conflict", "mediation", "pam_agneg2")]
conflict_cases <- mutate(group_by(conflict_cases, cowcode, Year),
                         counter = 1:n())

conflict_cases_year <- dplyr::summarize(group_by(conflict_cases, Year),
                                 conflict_sum = sum(conflict, na.rm = TRUE),
                                 mediation_sum = sum(mediation, na.rm = TRUE),
                                 cpa_sum = sum(pam_agneg2, na.rm = TRUE))
names(conflict_cases_year) <- c("Year", "Conflict years", "Conflict-years with mediation", "CPAs signed")

conflict_cases_year_long <- gather(conflict_cases_year, key = "Outcome", value = "value", -Year)

p1 <- ggplot(data = conflict_cases_year_long, aes(x = Year, y = value, fill = Outcome)) + 
  geom_col(position = "stack") + 
  xlab("") + ylab("Country-years") + 
  theme_jk() + scale_fill_viridis(discrete = TRUE, direction = -1) + 
  theme(legend.position = "bottom")

ggsave(p1, file = "Output/Figures/mediation_time_stacked.pdf", width = 6, height = 4) # , device = cairo_pdf)

p2 <- ggplot(data = conflict_cases_year_long, aes(x = Year, y = value)) + 
  geom_col() + 
  facet_wrap(~ Outcome, ncol = 1, scales = "free_y") +
  xlab("") + ylab("Country-years") + 
  theme_jk()

ggsave(p2, file = "Output/Figures/mediation_time_panels.pdf", width = 6, height = 5) # , device = cairo_pdf)

# Mediation by region

conflict_cases$continent <- countrycode::countrycode(sourcevar = conflict_cases$cowcode, origin = "cown", destination = "continent")
conflict_cases[conflict_cases$cowcode == 345, ]$continent <- "Europe"
conflict_cases[conflict_cases$cowcode == 678, ]$continent <- "Asia"

conflict_cases$region <- countrycode::countrycode(sourcevar = conflict_cases$cowcode, origin = "cown", destination = "region")
conflict_cases[conflict_cases$cowcode == 345, ]$region <- "Eastern Europe"
conflict_cases[conflict_cases$cowcode == 678, ]$region <- "Western Asia"

conflict_cases_year_continent <- dplyr::summarize(group_by(conflict_cases, Year, continent),
                                           conflict_sum = sum(conflict, na.rm = TRUE),
                                           mediation_sum = sum(mediation, na.rm = TRUE),
                                           cpa_sum = sum(pam_agneg2, na.rm = TRUE))
names(conflict_cases_year_continent) <- c("Year", "Continent", "Conflict-years", "Conflict-years with mediation", "CPAs signed")

conflict_cases_year_continent_long <- gather(conflict_cases_year_continent, key = "Outcome", value = "value", -Year, -Continent)

p1 <- ggplot(data = conflict_cases_year_continent_long, aes(x = Year, y = value, fill = Outcome)) + 
  geom_col(position = "stack") + 
  facet_wrap(~ Continent, ncol = 1) +
  xlab("") + ylab("Country-years") + 
  ggthemes::theme_base() + scale_fill_viridis(discrete = TRUE, direction = -1) + 
  theme(legend.position = "bottom", legend.title = element_blank())

ggsave(p1, file = "Output/Figures/mediation_time_continent.pdf", width = 6.5, height = 7) #, device = cairo_pdf)

##########################
### Compare Ivory Coast to other cases 
########################## 

ic_comp <- select(d_c[d_c$cwm_sample == 1 | d_c$mic_sample == 1, ], 
                  pam_agneg2, mediation_cumsum_log_lag, hligo_ipol_lag_std, hsigo_ipol_lag_std, 
                  bdbest_sum_log, conflictyears_log, territorial_conflict,
                  gems_drugs_oil, exclpop_std, population_log_lag, vdem_polyarchy_ctr_lag,
                  gdppc2005_log_lag, ai_df_std, UNPKO_yes_lag, pivv_us_std, pam_caseid)
ic_comp$IC <- ifelse(ic_comp$pam_caseid == 11, 1, 0)
ic_comp$IC <- ifelse(is.na(ic_comp$pam_caseid) == TRUE, 0, ic_comp$IC)


ic_comp <- as.data.frame(ic_comp)
ic_comp$ID_ucdp <- NULL
ic_comp$pam_caseid <- NULL

ic_comp$population_log_lag <- meanstd(ic_comp$population_log_lag)
ic_comp$gdppc2005_log_lag <- meanstd(ic_comp$gdppc2005_log_lag)
ic_comp$conflictyears_log <- meanstd(ic_comp$conflictyears_log)
ic_comp$bdbest_sum_log <- meanstd(ic_comp$bdbest_sum_log)
ic_comp$mediation_cumsum_log_lag <- meanstd(ic_comp$mediation_cumsum_log_lag)

names(ic_comp) <- c("CPA signed", "Count of mediation years (logged, standardized)", "Memberships in IGOs with high economic leverage (standardized)", "HSIGO memberships (standardized)", 
                    "Battle deaths (logged, standardized)", "Conflict duration (logged, standardized)", "Territorial conflict", "Natural resources", 
                    "Excluded population (standardized)", "Population (logged, standardized)", "Democracy", "GDP per capita (logged, standardized)", 
                    "Trade globalization (standardized)", "UNPKO", "UNGA proximity to US (standardized)", 
                    "IC")

ic_comp_long <- gather(ic_comp, key = variable, value = value, -IC)
ic_comp_long$variable_pos <- as.numeric(as.factor(ic_comp_long$variable))
ic_comp_long$country <- ifelse(ic_comp_long$IC == 1, "ci", NA)

p2 <- ggplot(data = filter(ic_comp_long, IC == 0), aes(x = value, y = variable)) + 
  geom_density_ridges(color = NA, fill = "#fde725", stat = "binline", bins = 20, scale = 1.1) +
  xlim(-3, 3) + 
  geom_point(data = filter(ic_comp_long, IC == 1), aes(x = value, y = variable_pos + 0.25), color = "#440154", size = 3) + 
  theme_minimal() + 
  theme(text = element_text(color = "black")) + 
  ylab("") + xlab("")

ggsave(p2, file = "Output/Figures/ic_comparison.pdf", width = 6, height = 6) # , device = cairo_pdf)

##########################
### Bivariate probit
########################## 

d_c_sel <- d_c[d_c$cwm_sample == 1 | d_c$mic_sample == 1, ]
d_c_sel$mediation_yes <- ifelse(d_c_sel$mediation_cumsum > 0, 1, 0)
d_c_sel$mediation_yes_fac <- as.factor(d_c_sel$mediation_yes)
d_c_sel$pam_agneg2_fac <- as.factor(d_c_sel$pam_agneg2)

# Bivariate probit 

mediation.hligo.eq <- mediation_yes_fac ~ hligo_ipol_y0 + bdbest_sum_log + conflictyears_log + territorial_conflict + gems_drugs_oil + population_log_y0 + vdem_polyarchy_ctr_y0 + gdppc2005_log_y0 + ai_df_std_y0 + pivv_us_std_y0
out.hligo.eq <- pam_agneg2_fac ~ hligo_ipol_lag*mediation_cumsum_log_lag + bdbest_sum_log + conflictyears_log + territorial_conflict + gems_drugs_oil + exclpop_std + population_log_lag + vdem_polyarchy_ctr_lag + gdppc2005_log_lag + UNPKO_yes_lag

hligo.eq.list <- list(mediation.hligo.eq, out.hligo.eq)

h1h2_hligo_biprobit <- gjrm(formula = hligo.eq.list, 
                            data = d_c_sel, 
                            model = "B",
                            margins = c("probit", "probit"))
summary(h1h2_hligo_biprobit)
h1h2_hligo_biprobit_coef <- h1h2_hligo_biprobit$gam2$coefficients
h1h2_hligo_biprobit_vcov <- h1h2_hligo_biprobit$Vb[12:24, 12:24]

# Tables

# for now, just produce text output for verbatim printing in document

sink("Output/Tables/h2_hligo_biprobit.txt")
summary(h1h2_hligo_biprobit)
sink()

# simulated data for predictions

h2_c_m1_pa_lev_1_simdat$`hligo_ipol_lag:mediation_cumsum_log_lag` <- h2_c_m1_pa_lev_1_simdat$hligo_ipol_lag * h2_c_m1_pa_lev_1_simdat$mediation_cumsum_log_lag
h2_c_m1_pa_lev_1_simdat$`(Intercept)` <- 1
h2_c_m1_pa_lev_1_simdat <- h2_c_m1_pa_lev_1_simdat[, c("(Intercept)", "hligo_ipol_lag", "mediation_cumsum_log_lag", 
                                                       "bdbest_sum_log", "conflictyears_log", "territorial_conflict", 
                                                       "gems_drugs_oil", "exclpop_std", "population_log_lag", "vdem_polyarchy_ctr_lag", 
                                                       "gdppc2005_log_lag", "UNPKO_yes_lag", "hligo_ipol_lag:mediation_cumsum_log_lag"
)]

# bX

set.seed(123)
h1h2_hligo_biprobit_eq2_sim <- MASS::mvrnorm(n = 1000, mu = h1h2_hligo_biprobit_coef, Sigma = h1h2_hligo_biprobit_vcov)
# coefs_heck_stage2 <- MASS::mvrnorm(n = 1000, mu = coef(h1h2_sel_mod)[4:7], Sigma = vcov(h1h2_sel_mod)[4:7, 4:7])

h1h2_hligo_biprobit_simpp_hligo <- pnorm(t(data.matrix(h2_c_m1_pa_lev_1_simdat) %*% t(h1h2_hligo_biprobit_eq2_sim)))

colnames(h1h2_hligo_biprobit_simpp_hligo) <- as.character(c(1:15))
h1h2_hligo_biprobit_simpp_hligo_long <- h1h2_hligo_biprobit_simpp_hligo |> 
  as.data.frame() |> 
  pivot_longer(cols = everything(), names_to = "rowid", values_to = "predicted")
h1h2_hligo_biprobit_simpp_hligo_long$hligo_ipol_lag = rep(h2_c_m1_pa_lev_1_simdat$hligo_ipol_lag, 1000)
h1h2_hligo_biprobit_simpp_hligo_long$mediation_cumsum_log_lag = rep(h2_c_m1_pa_lev_1_simdat$mediation_cumsum_log_lag, 1000)

h1h2_hligo_biprobit_simpp_hligo_dat <- h1h2_hligo_biprobit_simpp_hligo_long |> 
  group_by(hligo_ipol_lag, mediation_cumsum_log_lag) |> 
  dplyr::summarize(
    pred = quantile(predicted, probs = 0.5),
    conf.low = quantile(predicted, probs = 0.083),
    conf.high = quantile(predicted, probs = 0.917))

h1h2_hligo_biprobit_simpp_hligo_dat$predicted <- h1h2_hligo_biprobit_simpp_hligo_dat$pred

h1h2_hligo_biprobit_simpp_hligo_dat$mediation_cumsum_lag <- factor(h1h2_hligo_biprobit_simpp_hligo_dat$mediation_cumsum_log_lag, levels = sort(unique(h1h2_hligo_biprobit_simpp_hligo_dat$mediation_cumsum_log_lag)), labels = c(0, 1, 5, 10, 15))
h1h2_hligo_biprobit_simpp_hligo_dat$hligo_ipol_lag_factor <- factor(as.character(h1h2_hligo_biprobit_simpp_hligo_dat$hligo_ipol_lag), labels = c("3 IGO memberships", "5 IGO memberships\n(sample mean)", "7 IGO memberships"), ordered = TRUE)

h1h2_hligo_biprobit_simpp_eff_p <- ggplot(data = h1h2_hligo_biprobit_simpp_hligo_dat, 
                                          aes(x = mediation_cumsum_lag, y = predicted)) +
  geom_segment(aes(y = conf.low, yend = conf.high, xend = mediation_cumsum_lag), color = "black") + 
  facet_wrap(~ hligo_ipol_lag_factor, ncol = 3) + 
  geom_line(aes(x = mediation_cumsum_lag, y = predicted, group = 1), linewidth = 0.5, lineend = "round") + 
  geom_point(color = "black") +
  labs(x = "Prior conflict years with mediation\n(back-transformed from logged count)", 
       y = "Probability of CPA signing",
       caption = "Predictions based on typical values\nN = 145 conflicts\nPrediction errors clustered by region") +
  theme_jk()

ggsave(h1h2_hligo_biprobit_simpp_eff_p, file = "Output/Figures/h2_c_m1_pa_1_hligo_biprobit_eff.pdf",
       width = 10, height = 5) # , device = cairo_pdf)

